First, I loaded the tidyverse and read in the function created by Benjamin for the county level data *
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(usmap)
library(ggplot2)
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(mapdata)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
read_and_clean_county_data <- function(file_path_name, state, cancer_type) {
read_csv(file_path_name,
skip = 8) %>%
janitor::clean_names() %>%
select(county, age_adjusted_incidence_rate_rate_note_cases_per_100_000) %>%
mutate(county = (gsub('.{10}$', '', county)),
state = state,
cancer_type = cancer_type,
age_adjusted_incidence_rate =
as.numeric(age_adjusted_incidence_rate_rate_note_cases_per_100_000)) %>%
select(state, county, cancer_type, age_adjusted_incidence_rate)
}
Next, I loaded in the county data sets using this function:
#county-level lung cancer PA data https://statecancerprofiles.cancer.gov/
pa_county_lc = read_and_clean_county_data('./data/pa_lung_data_county.csv',
state = "PA",
cancer_type = "lung") %>%
filter(complete.cases(.)) %>%
slice(-c(1, 2))
## New names:
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...5`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...6`
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...13`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...14`
## Rows: 87 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): County, FIPS, Met Healthy People Objective of ***?, CI*Rank([rank n...
## dbl (7): Age-Adjusted Incidence Rate([rate note]) - cases per 100,000, Lower...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, see `problems()` for details
#county-level lung cancer OH data https://statecancerprofiles.cancer.gov/
oh_county_lc = read_and_clean_county_data('./data/oh_lung_data_county.csv',
state = "OH",
cancer_type = "lung") %>%
filter(complete.cases(.)) %>%
slice(-c(1, 2))
## New names:
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...5`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...6`
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...13`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...14`
## Rows: 108 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): County, FIPS, Met Healthy People Objective of ***?, CI*Rank([rank n...
## dbl (7): Age-Adjusted Incidence Rate([rate note]) - cases per 100,000, Lower...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, see `problems()` for details
#county-level lung cancer NY data https://statecancerprofiles.cancer.gov/
ny_county_lc = read_and_clean_county_data('./data/ny_lung_data_county.csv',
state = "NY",
cancer_type = "lung") %>%
filter(complete.cases(.)) %>%
slice(-c(1, 2))
## New names:
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...5`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...6`
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...13`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...14`
## Rows: 82 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): County, FIPS, Met Healthy People Objective of ***?, CI*Rank([rank n...
## dbl (7): Age-Adjusted Incidence Rate([rate note]) - cases per 100,000, Lower...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, see `problems()` for details
view(oh_county_lc)
view(pa_county_lc)
view(ny_county_lc)
Finally, I loaded in the state-level lung cancer data
#state-level lung cancer PA data https://www.phaim1.health.pa.gov/EDD/
pa_cancer = read.csv('./data/pa_state_lung.csv', sep = ";", header = T, skip = 3) %>%
janitor::clean_names() %>%
select(year, rate_ratio_result) %>%
map_df(str_replace, pattern = ",", replacement = ".") %>%
map_df(as.numeric) %>%
rename(c("age_adjusted_incidence_rate" = "rate_ratio_result")) %>%
add_column(state = "PA")
#state-level lung cancer OH data https://publicapps.odh.ohio.gov/EDW/DataBrowser/Browse/StateLayoutLockdownCancers
oh_cancer = read.csv('./data/oh_state_lung.csv', sep = ";") %>%
janitor::clean_names() %>%
rename(year = cancer_year_year) %>%
select(year, age_adjusted_rate) %>%
map_df(str_replace, pattern = ",", replacement = ".") %>%
map_df(as.numeric) %>%
rename(c("age_adjusted_incidence_rate" = "age_adjusted_rate")) %>%
add_column(state = "OH") %>%
distinct() %>%
filter(complete.cases(.))
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
#state-level lung cancer NY data https://www.health.ny.gov/statistics/cancer/registry/table2/tb2lungnys.htm
ny_cancer = read.csv("./data/ny_state_lung.csv", skip = 2)[ ,1:3] %>%
janitor::clean_names() %>%
rename(year = x) %>%
select(year, rate_per_100_000_population) %>%
rename(c("age_adjusted_incidence_rate" = "rate_per_100_000_population")) %>%
add_column(state = "NY")
# merging county level data for lung cancer
fips_codes = read.csv('./data/fips_codes.csv') %>%
janitor::clean_names() %>%
filter(state %in% c("NY", "PA", "OH")) %>%
rename(c("county" = "name"))
fips_codes$county[fips_codes$county == "St Lawrence"] <- "St. Lawrence"
county_lc <- rbind(pa_county_lc, oh_county_lc, ny_county_lc)
lc_fips <- merge(county_lc, fips_codes, by = c("state", "county"))
# lung cancer incidence maps by county
ny_map <- plot_usmap(data = lc_fips, values = "age_adjusted_incidence_rate", "counties", include = c("NY"), color = "black") +
labs(title = "New York Age-Adjusted Lung Cancer Rates, 2014-2018") +
scale_fill_continuous(low = "#FFE9C7", high = "#FF0000",
name = "age_adjusted_incidence_rate", label = scales::comma) +
theme(plot.background = element_rect(), legend.position = "right")
oh_map <- plot_usmap(data = lc_fips, values = "age_adjusted_incidence_rate", "counties", include = c("OH"), color = "black") +
labs(title = "Ohio Age-Adjusted Lung Cancer Rates, 2014-2018") +
scale_fill_continuous(low = "#FFE9C7", high = "#FF0000",
name = "age_adjusted_incidence_rate", label = scales::comma) +
theme(plot.background = element_rect(), legend.position = "right")
pa_map <- plot_usmap(data = lc_fips, values = "age_adjusted_incidence_rate", "counties", include = c("PA"), color = "black") +
labs(title = "Pennsylvania Age-Adjusted Lung Cancer Rates, 2014-2018") +
scale_fill_continuous(low = "#FFE9C7", high = "#FF0000",
name = "age_adjusted_incidence_rate", label = scales::comma) +
theme(plot.background = element_rect(), legend.position = "right")
ny_oh_pa_lungcancer <- plot_usmap(data = lc_fips, values = "age_adjusted_incidence_rate", "counties", include = c("NY", "OH", "PA"), color = "black") +
labs(title = "NY, OH, and PA Age-Adjusted Lung Cancer Rates, 2014-2018") +
scale_fill_continuous(low = "#FFE9C7", high = "#FF0000",
name = "age_adjusted_incidence_rate", label = scales::comma) +
theme(plot.background = element_rect(), legend.position = "right")
pa_map
oh_map
ny_map
ny_oh_pa_lungcancer
# merging state level data for lung cancer
state_lc <- rbind(pa_cancer, oh_cancer, ny_cancer)
view(state_lc)
state_lc_wide = state_lc %>%
pivot_wider(
names_from = state,
values_from = age_adjusted_incidence_rate) %>%
rename(c("PA_AAIR" = "PA", "NY_AAIR" = "NY", "OH_AAIR" = "OH"))
view(state_lc_wide)
fig <- plot_ly(state_lc_wide, x = ~year)
fig <- fig %>% add_lines(y = ~PA_AAIR, name = "Pennsylvania")
fig <- fig %>% add_lines(y = ~NY_AAIR, name = "New York")
fig <- fig %>% add_lines(y = ~OH_AAIR, name = "Ohio")
fig <- fig %>% layout(
title = "Lung Cancer Age-Adjusted Incidence Rates",
xaxis = list(
rangeselector = list(
buttons = list(
list(
count = 1,
label = "1 yr",
step = "year",
stepmode = "backward"),
list(
count = 5,
label = "5 yr",
step = "year",
stepmode = "backward"),
list(
count = 10,
label = "10 yr",
step = "year",
stepmode = "backward"),
list(step = "all"))),
rangeslider = list(type = "year")),
yaxis = list(title = "Age-Adjusted Incidence Rate (per 100,000"))
fig
ny_ap =
read.csv('./data/daily_ap/daily_ny_ap.csv')
oh_ap =
read.csv('./data/daily_ap/daily_oh_ap.csv')
pa_ap =
read.csv('./data/daily_ap/daily_pa_ap.csv')
annual_aqi =
read.csv('./data/annual_aqi.csv')
view(ny_ap)
view(oh_ap)
view(pa_ap)
view(annual_aqi)
average_med_aqi =
annual_aqi %>%
group_by(state, year) %>%
summarise_at(vars(median_aqi), list(name = mean))
lc_aqi_data <- merge(average_med_aqi, state_lc, by = c("year", "state")) %>%
rename(c("avg_med_aqi" = "name"))
view(lc_aqi_data)
average_med_aqi_wide =
average_med_aqi %>%
pivot_wider(
names_from = state,
values_from = name) %>%
rename(c("oh_avg_med_aqi" = "OH", "ny_avg_med_aqi" = "NY", "pa_avg_med_aqi" = "PA")) %>%
subset(select = -c(ME))
view(average_med_aqi_wide)
library(plotly)
fig1 <- lc_aqi_data %>%
plot_ly(
x = ~age_adjusted_incidence,
y = ~y,
frame = ~f,
type = 'scatter',
mode = 'markers',
showlegend = F
)
fig1 <- lc_aqi_data %>%
plot_ly(
x = ~avg_med_aqi,
y = ~age_adjusted_incidence_rate,
color = ~state,
frame = ~year,
text = ~state,
hoverinfo = "text",
type = 'scatter',
mode = 'markers'
)
fig1 <- fig1 %>% layout(
title = "Lung Cancer AAIR and Average Median AQI",
xaxis = list(title = "Average Median AQI",
type = "log"
),
yaxis = list(title = "Age Adjusted Incidence Rate")
)
fig1